library(RColorBrewer); #brewer.pal(n=8, name = "Dark2") #;display.brewer.pal(n = 8, name = "Dark2")
library(tidyverse); theme_set(theme_classic()) #ggplot2, dplyr, tibble, etc.
library(plotly) #use ggplotly() for interactive plots with scoll-over IDs
library(knitr) #use kable() to make formatted tables
library(kableExtra)
library(Rtsne)
library(glmmTMB)
library(lme4)
library(sjPlot)
library(ggpubr) #ggarrange() figure wraps
library(plyr)
library(ggeffects) #ggpredict
library(rstanarm) #stan models
library(shinystan) #stan model evaluation >launch_shinystan_demo()
library(loo) #loo() to compare fits between bayesian models
library(MuMIn) #dredge, model averaging
library(AICcmodavg) #aictab() for AICc model comparisons
library(coin) #permutation test for ratios with independence_test()
library(janitor) #get_dupes() finds and prints duplicates
Replace zeros with NAs, sort and filter by age (23-27)
mc=tibble(read.csv("CB-mouthColor.csv", h=T)) #N=225
#replace zeros with NAs in morphometrics
mc[mc==0] <- NA
#sort by year > family > id
mc <- mc %>% arrange(year, family, id) #N=225
#filter to include only ages 23-27
mc <- mc %>% filter(between(age,23,27)) #N=141
mc <- mc %>% drop_na(sex) #N=138
Identify duplicate records in mc
mc %>% get_dupes(id)
## # A tibble: 8 x 67
## id dupe_count row family year age sex tongY1 palY1 meanY1 tongM1
## <chr> <int> <int> <chr> <int> <int> <chr> <int> <int> <int> <int>
## 1 42 ROWA04 2 118 ROWA04 2004 24 F 60 50 55 80
## 2 42 ROWA04 2 137 ROWA04 2004 24 F 60 50 55 70
## 3 53 ROWA04 2 119 ROWA04 2004 24 F 60 50 55 70
## 4 53 ROWA04 2 138 ROWA04 2004 24 F 50 30 40 60
## 5 64 ROWA04 2 120 ROWA04 2004 24 F 70 40 55 80
## 6 64 ROWA04 2 139 ROWA04 2004 24 F 60 40 50 60
## 7 75 ROWA04 2 121 ROWA04 2004 24 F 60 40 50 70
## 8 75 ROWA04 2 140 ROWA04 2004 24 F 50 50 50 50
## # ... with 56 more variables: palM1 <int>, meanM1 <dbl>, sat1 <dbl>,
## # hue1 <dbl>, tongY2 <int>, palY2 <int>, meanY2 <int>, tongM2 <int>,
## # palM2 <int>, meanM2 <dbl>, sat2 <dbl>, hue2 <dbl>, bodTemp1 <dbl>,
## # bodTemp2 <dbl>, diffBodTemp <dbl>, ambTemp1 <dbl>, ambTemp2 <dbl>,
## # ambFinal <dbl>, broodSize <int>, bandTime1 <chr>, bandTime2 <chr>,
## # handleTime <int>, bandOrder <int>, time1 <chr>, time2 <chr>,
## # obtainTime <chr>, date <chr>, julDate <int>, mass <int>, tailLength <int>,
## # tarsus <dbl>, wingChord <int>, sevenPrim <dbl>, expSevenPrim <dbl>,
## # billNT <dbl>, billWidth <dbl>, billDepth <dbl>, TEC <dbl>, head <dbl>,
## # skull <dbl>, timeOut1 <int>, timeOut2 <int>, obt1Max <dbl>, obt1Min <dbl>,
## # obt1Max.1 <dbl>, obt1Min.1 <dbl>, t1MaxMinusHour <dbl>,
## # t1MinMinusHour <dbl>, t1MeanMinusHour <dbl>, t1MaxAtBand <dbl>,
## # t1MinAtBand <dbl>, t1MeanAtBand <dbl>, futureBreeder <int>,
## # broodSize.1 <int>, familyProducedBreeder <int>, comments <chr>
For these duplicate records, one record for each individual is missing time 2 measurements. Therefore, I am keeping only the complete record for each individual. Removed = 42 ROWA04 (row 137), 53 ROWA04 (row 138), 64 ROWA04 (row 139), 75 ROWA04 (row 140)
mc <- mc[-c(62,67,72,78),]#N=134
#mc %>% get_dupes() #double check...yep, no duplicates
Create subsets of mc for females (N=80) and males (N=54) only
#females only
female.mc <- mc %>%
filter(sex == "F") #N=80
#males only
male.mc <- mc %>%
filter(sex == "M") #N=54
Nestling morphometrics are being used here to boost sample size for calculating mass/tarsus residuals
Replace zeros with NAs, sort and filter by age (23-27)
nm=tibble(read.csv("nestlingMorph.csv", h=T))
#replace zeros with NAs in morphometrics
nm[nm==0] <- NA
#remove nestlingMeasurementNumber
nm <- nm[,-1]
#sort by year > nestName > id
nm <- nm %>% arrange(year, nestName ,id)
#filter to include only ages 23-27
nm <- nm %>% filter(between(age, 23, 27))
Create subset of nm including only sexed birds (N=503; females=268; males=235)
#subset nm to include only sexed birds and remove NAs from mass & tarsus
sexed.nm <- nm %>% drop_na(sex, tarsus, mass) #N=503
#make sex a factor rather than character
sexed.nm$sex <- as.factor(sexed.nm$sex)
#females only
female.nm <- sexed.nm %>%
filter(sex == "F") #N=268
#males only
male.nm <- sexed.nm %>%
filter(sex == "M") #N=235
Only includes sexed individuals (N=503)
sexed.nm %>%
ggplot(aes(mass, fill=sex))+
geom_histogram(binwidth = 10)+
scale_fill_manual(values = c("#1B9E77", "#D73027"),
name = "",
labels = c("Female","Male"))+
annotate(geom = "text", x=375, y=29, label="N=268")+
annotate(geom = "text", x=390, y=5, label="N=235")
Only includes sexed individuals (N=503)
sexed.nm %>%
ggplot(aes(tarsus, fill=sex))+
geom_histogram(binwidth = 0.5)+
scale_fill_manual(values = c("#1B9E77", "#D73027"),
name = "",
labels = c("Female","Male"))+
annotate(geom = "text", x=59, y=28, label="N=268")+
annotate(geom = "text", x=60, y=5, label="N=235")
sexed.nm %>%
ggplot(aes(x=tarsus,y=mass,color=sex))+
geom_point(alpha=0.5)+
geom_smooth()+
scale_color_manual(values = c("#1B9E77", "#D73027"),
name = "",
labels = c("Female","Male"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
femaleIndex.lm <- lm(mass~tarsus, female.nm)
summary(femaleIndex.lm)
##
## Call:
## lm(formula = mass ~ tarsus, data = female.nm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -106.011 -16.793 2.398 18.162 96.341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -400.1482 39.9747 -10.01 <2e-16 ***
## tarsus 13.0989 0.6907 18.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.09 on 266 degrees of freedom
## Multiple R-squared: 0.5748, Adjusted R-squared: 0.5732
## F-statistic: 359.6 on 1 and 266 DF, p-value: < 2.2e-16
Check residual distributions
*Q-Q plot looks a little non-normal, probably seeing the effects of reduced sample size by separating sexes.
plot(femaleIndex.lm)
female.nm %>%
ggplot(aes(x=tarsus, y=mass, color=as.factor(age)))+
geom_point(alpha = 0.5)+
geom_smooth(aes(group=1))+
scale_color_brewer(palette = "Set1")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
maleIndex.lm <- lm(mass~tarsus, male.nm)
summary(maleIndex.lm)
##
## Call:
## lm(formula = mass ~ tarsus, data = male.nm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -112.780 -23.824 1.778 26.411 118.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -367.7149 50.4700 -7.286 4.91e-12 ***
## tarsus 12.6580 0.8477 14.932 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.67 on 233 degrees of freedom
## Multiple R-squared: 0.489, Adjusted R-squared: 0.4868
## F-statistic: 223 on 1 and 233 DF, p-value: < 2.2e-16
Plot residuals
plot(maleIndex.lm)
male.nm %>%
ggplot(aes(x=tarsus, y=mass, color=as.factor(age)))+
geom_point(alpha = 0.5)+
geom_smooth(aes(group=1))+
scale_color_brewer(palette = "Set1")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Create dataframes containing id and residuals
#male residuals pulled from lm() output
male.nm$resids <- maleIndex.lm$residuals
#female residuals pulled from lm() output
female.nm$resids <- femaleIndex.lm$residuals
#dataframes containing male and female ids and residuals for the join with mc
joinMaleResidual <- data.frame(id = male.nm$id,
resids = male.nm$resids)
joinFemaleResidual <- data.frame(id = female.nm$id,
resids = female.nm$resids)
Join residuals with male and female mouth color data by common id
#left join with female mc
newFemale.mc <- left_join(female.mc,joinFemaleResidual, by="id")
female.mc$resids <- newFemale.mc$resids
#left join with male mc
newMale.mc <- left_join(male.mc,joinMaleResidual, by="id")
male.mc$resids <- newMale.mc$resids
#left join with both sexes (full mc)
bothSexResid <- data.frame(rbind(joinMaleResidual,joinFemaleResidual))
new.mc <- left_join(mc,bothSexResid, by="id")
mc$resids <- new.mc$resids
Distribution of residuals for both sexes combined
mc %>%
ggplot(aes(x=resids, fill=sex))+
geom_histogram(binwidth = 10)+
scale_fill_manual(values = c("#1B9E77", "#D73027"),
name = "",
labels = c("Female","Male"))
sexed.nm$ratio <- sexed.nm$mass/sexed.nm$tarsus
independence_test(ratio~sex, data = sexed.nm)
##
## Asymptotic General Independence Test
##
## data: ratio by sex (F, M)
## Z = -4.6465, p-value = 3.376e-06
## alternative hypothesis: two.sided
This result doesn’t indicate the direction of the relationship, but simply indicates that the female ratio is statistically different than the male ratio. This suggests that modeling male and female mouth color separately is likely best.
Plotting mass/tarsus ratio by sex
sexed.nm %>%
ggplot(aes(x=sex, y=ratio, color=sex))+
geom_boxplot()+
ylab("Mass/tarsus ratio")+
scale_color_manual(values = c("#1B9E77", "#D73027"),
name = "",
labels = c("Female","Male"))+
geom_jitter(alpha=0.2)
Examine whether brood size affects body temp
Plotting body temp by brood size
bodyTempByBroodSize.plot <- mc %>%
ggplot(aes(x=broodSize,y=bodTemp1,
group=as.factor(broodSize), color=as.factor(broodSize),
label=id))+
geom_boxplot()+
geom_jitter()+
scale_color_brewer(palette = "Dark2")
ggplotly(bodyTempByBroodSize.plot)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
Contrary to our expectation, it looks like the largest broods (6 nestlings) is significantly lower in body temp on average, but it’s the smallest sample. Probably shouldn’t try to infer much, but at least we aren’t seeing big differences across brood sizes on the whole.
summary(lm(bodTemp1~as.factor(broodSize), data = mc))
##
## Call:
## lm(formula = bodTemp1 ~ as.factor(broodSize), data = mc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.16111 -0.57188 0.04412 0.54412 2.02812
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.61667 0.37826 107.379 < 2e-16 ***
## as.factor(broodSize)2 -0.55556 0.43677 -1.272 0.20571
## as.factor(broodSize)3 0.23922 0.41028 0.583 0.56089
## as.factor(broodSize)4 0.01754 0.40702 0.043 0.96569
## as.factor(broodSize)5 -0.24479 0.41219 -0.594 0.55365
## as.factor(broodSize)6 -1.51667 0.56104 -2.703 0.00781 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9265 on 127 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1474, Adjusted R-squared: 0.1139
## F-statistic: 4.393 on 5 and 127 DF, p-value: 0.001009
mc$ratio <- mc$mass/mc$tarsus
Distribution of mass/tarsus ratio for both sexes combined
mc %>%
ggplot(aes(x=ratio, fill=sex))+
geom_histogram(binwidth = 0.4)+
scale_fill_manual(values = c("#1B9E77", "#D73027"),
name = "",
labels = c("Female","Male"))
#hist(mc$ratio)
sat1Ratio.mdl <- lmer(sat1 ~ age + ratio + sex +
bodTemp1 + ambTemp1 + timeOut1 + (1|family),
data = mc, REML = FALSE)
summary(sat1Ratio.mdl)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: sat1 ~ age + ratio + sex + bodTemp1 + ambTemp1 + timeOut1 + (1 |
## family)
## Data: mc
##
## AIC BIC logLik deviance df.resid
## 833.4 857.0 -407.7 815.4 93
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4104 -0.5758 0.1540 0.5680 2.1224
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 127.5 11.29
## Residual 107.2 10.35
## Number of obs: 102, groups: family, 34
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 159.9507 71.5024 2.237
## age -3.4562 1.6174 -2.137
## ratio 4.9085 2.4057 2.040
## sexM -4.0191 2.6334 -1.526
## bodTemp1 0.2575 1.7540 0.147
## ambTemp1 0.2734 0.5476 0.499
## timeOut1 0.1417 0.1516 0.935
##
## Correlation of Fixed Effects:
## (Intr) age ratio sexM bdTmp1 ambTm1
## age -0.427
## ratio 0.101 -0.098
## sexM -0.153 -0.037 -0.223
## bodTemp1 -0.795 -0.156 -0.261 0.198
## ambTemp1 0.134 0.225 0.048 0.061 -0.423
## timeOut1 -0.074 -0.123 0.095 0.003 0.100 -0.114
Plot ratio model
plot_model(sat1Ratio.mdl,
title = "Mass/tarsus ratio model for saturation 1",
bpe = "median",
vline.color = "black",
show.values = TRUE,
value.offset = .3,
line.size = 1.2)
sat1Residual.mdl <- lmer(sat1 ~ age + resids + sex +
bodTemp1 + ambTemp1 + timeOut1 + (1|family),
data = mc, REML = FALSE)
summary(sat1Residual.mdl)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: sat1 ~ age + resids + sex + bodTemp1 + ambTemp1 + timeOut1 +
## (1 | family)
## Data: mc
##
## AIC BIC logLik deviance df.resid
## 833.8 857.4 -407.9 815.8 93
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.44228 -0.52381 0.09588 0.61258 2.22548
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 145.9 12.08
## Residual 103.0 10.15
## Number of obs: 102, groups: family, 34
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 172.10735 73.06992 2.355
## age -3.76360 1.70386 -2.209
## resids 0.08698 0.04909 1.772
## sexM -2.19575 2.55334 -0.860
## bodTemp1 1.03847 1.67983 0.618
## ambTemp1 -0.02192 0.56075 -0.039
## timeOut1 0.12343 0.15371 0.803
##
## Correlation of Fixed Effects:
## (Intr) age resids sexM bdTmp1 ambTm1
## age -0.471
## resids 0.177 -0.175
## sexM -0.105 -0.081 0.124
## bodTemp1 -0.781 -0.164 -0.059 0.139
## ambTemp1 0.086 0.246 -0.117 0.056 -0.408
## timeOut1 -0.086 -0.108 0.016 0.035 0.132 -0.105
Plot residual model
plot_model(sat1Residual.mdl,
title = "Mass/tarsus residual model for saturation 1",
bpe = "median",
vline.color = "black",
show.values = TRUE,
value.offset = .3,
line.size = 1.2)
Now let’s compare the fit of ratio and residual models
ratioOrResid.mdls <- c(sat1Ratio.mdl, sat1Residual.mdl)
ratioOrResid.modnames <- c("Ratio", "Residual")
confset(ratioOrResid.mdls, modnames = ratioOrResid.modnames, method = "ordinal")
##
## Confidence set for the best model
##
## Method: ordinal ranking based on delta AIC
##
## Models with substantial weight:
## K AICc Delta_AICc AICcWt
## Ratio 9 835.33 0.00 0.56
## Residual 9 835.78 0.45 0.44
##
##
## Models with some weight:
## K AICc Delta_AICc AICcWt
##
##
## Models with little weight:
## K AICc Delta_AICc AICcWt
##
##
## Models with no weight:
## K AICc Delta_AICc AICcWt
Using AICc Weight (AICcWt), the confset() function calculates the probability of each model (i.e. each hypothesis) given the data and the other candidate models. So in this case, we have more confidence in the ratio models ability to explain the variation in the data, even though their AICc values are only slightly different.
sat1NoSex.mdl <- lmer(sat1 ~ age + resids + bodTemp1 + ambTemp1 + timeOut1 + (1|family),
data = mc, REML = FALSE)
sat1NoSex.mdl2 <- lmer(sat1 ~ age + resids + bodTemp1 * ambTemp1 + (1|family),
data = mc, REML = FALSE)
sat1NoSex.mdl3 <- lmer(sat1 ~ age + resids + bodTemp1 + (1|family),
data = mc, REML = FALSE)
In the following model, we’re using the difference between body temp and 41 degrees, which is the mean body temp for birds > 28 days old. We chose this age range since these mature nestlings should have had an easier time maintaining an ‘ideal’ body temp.
#calculate the difference between each birds body temp and 41 degrees (mean of birds >28 days of age)
mc$tempDiff <- mc$bodTemp1-41
#model including tempDiff
diffTemp.mdl <- lmer(sat1 ~ age + resids + tempDiff + (1|family), data = mc, REML = FALSE)
summary(diffTemp.mdl)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: sat1 ~ age + resids + tempDiff + (1 | family)
## Data: mc
##
## AIC BIC logLik deviance df.resid
## 1077.2 1094.6 -532.6 1065.2 127
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2973 -0.5197 0.0412 0.5876 2.1990
##
## Random effects:
## Groups Name Variance Std.Dev.
## family (Intercept) 115.0 10.72
## Residual 113.3 10.65
## Number of obs: 133, groups: family, 44
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 219.14291 35.97746 6.091
## age -3.90227 1.41763 -2.753
## resids 0.10560 0.03517 3.003
## tempDiff 0.80103 1.27874 0.626
##
## Correlation of Fixed Effects:
## (Intr) age resids
## age -0.998
## resids 0.069 -0.069
## tempDiff 0.061 -0.043 -0.146
plot_model(diffTemp.mdl,
title = "Both sexes saturation 1",
bpe = "median",
vline.color = "black",
show.values = TRUE,
value.offset = .3,
line.size = 1.2)
# AIC(sat1Sex.mdl, sat1NoSex.mdl)
# sexOrNo <- c(sat1Sex.mdl, sat1NoSex.mdl, ratioSex.mdl, sat1NoSex.mdl2, sat1NoSex.mdl3)
# modname <- c("sex", "no", "ratio", "noTimeOut", "noAmbTemp")
# confset(sexOrNo, modnames = modname, method = "ordinal")
hue1.mdl <- lmer(hue1*100 ~ age + ratio + sex + bodTemp1 + ambTemp1 + timeOut1 + (1|family),
data = mc, REML = FALSE)
plot_model(hue1.mdl,
title = "Ratio hue 1",
bpe = "median",
vline.color = "black",
show.values = TRUE,
value.offset = .3,
line.size = 1.2)